import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
df = pd.read_csv('airquality.csv')
df.set_index(['Day', 'Month'],inplace=True)
df.head()
df.corr()
pd.plotting.scatter_matrix(df,figsize=(10, 10))
plt.show()
columns=['education', 'age', 'capital-gain', 'race', 'capital-loss',
'hours-per-week', 'sex', 'classification']
census = pd.read_csv('census.csv',names=columns, header=None)
census['capital-gain'] = pd.to_numeric(census['capital-gain'],errors='coerce')
census.sex = census.sex.astype('category').cat.codes
census.classification = census.classification.astype('category').cat.codes
census.head()
y=census.race.value_counts().index
x=census.race.value_counts().values
plt.bar(y,x,color=['r','g','b','y','k'])
plt.xticks(rotation = 90)
plt.title("Race Analysis")
for index,data in enumerate(x):
plt.text(x=index , y =data+200 , s=f"{data}" )
plt.show()
y=census.education.value_counts().index
x=census.education.value_counts().values
plt.bar(y,x,color='green')
plt.axhline(5000, color='red', ls='dotted')
plt.xticks(rotation = 90)
plt.title("Education")
for index,data in enumerate(x):
plt.text(x=index , y =data+100 , s=f"{data}" )
plt.show()
plt.figure(figsize=(6,6))
x,y = plt.pie(census.race.value_counts().values,labels=census.race.value_counts().index)
x[1].set_visible(False)
plt.show()
plt.figure(figsize=(6,6))
plt.pie(census.education.value_counts().values[:-3],
labels=census.education.value_counts()[:-3].index,
autopct="%0.2f",pctdistance=0.80
)
hole = plt.Circle((0, 0), 0.6, facecolor='white')
plt.gcf().gca().add_artist(hole)
np.concatenate((census[census.sex==1].classification.value_counts().values,
census[census.sex==0].classification.value_counts().values))
plt.figure(figsize=(8,8))
#outside donut
ins=plt.pie(census.sex.value_counts().values,
labels=["Male","Female"],
autopct="%0.2f",
pctdistance=0.80,
startangle=90)
#inside donut
out=plt.pie(np.concatenate((census[census.sex==1].classification.value_counts().values,
census[census.sex==0].classification.value_counts().values)),
labels=["<50k",">50k","<50k",">50k"],
autopct="%0.2f",radius=0.7,pctdistance=0.80,startangle=90)
hole = plt.Circle((0, 0), 0.4, color='white', linewidth=0)
fig = plt.gcf()
fig.gca().add_artist(hole)
plt.show()
#plt.gcf().gca().add_artist(hole)
calls=pd.read_csv('crimes_dataset.csv')
calls.sample(5)
calls.shape
import folium
import folium.plugins # The Folium Javascript Map Library
SF_COORDINATES = (37.87, -122.28)
sf_map = folium.Map(location=SF_COORDINATES, zoom_start=13)
locs = calls[['Lat', 'Lon']].astype('float').dropna().to_numpy()
heatmap = folium.plugins.HeatMap(locs.tolist(), radius = 10)
sf_map.add_child(heatmap)
cluster = folium.plugins.MarkerCluster()
for _, r in calls[['Lat', 'Lon', 'CVLEGEND']].tail(5000).dropna().iterrows():
cluster.add_child(
folium.Marker([float(r["Lat"]), float(r["Lon"])], popup=r['CVLEGEND']))
sf_map = folium.Map(location=SF_COORDINATES, zoom_start=13)
sf_map.add_child(cluster)
sf_map